Course: Visual Analytics for Policy and Management

Final Project

Group 3: Dave Coomes, Quinhas Fernndes, Isabella Sun, Long Zong

This tutorial includes 7 figures:

  1. Univariate: Mozambique Community Health Workers contribuition on Family Planning

  2. Univariate: Province level gap to achieve under five mortality reduction goals

  3. Bivariate: Under Five Mortality Distribution by Region in Mozamique

  4. Multivariate: Radar plot- Main study variables by province

  5. Multivariate: Factors that may influence the mortality rate in Mozambique over time

  6. Multivariate: Regression Confidence Intervals - Effect of Health Human resources and Service Utilization on Child Mortality

  7. Maps: Change in under 5 mortality rate from 2000-2010 by province

For this final project, we used data from two sources. The first is the Under Five Mortality Dataset. The second is the Community Health Workers dataset.

Under Five Mortality dataset - Provincial-level under-5, infant, and neonatal mortality from 2000 to 2010 in Mozambique was estimated using data from the 2003 and 2011 DHS and the 2008 Multiple Indicator Cluster Survey (MICS). These three datasets were merged and used to calculate the provincial-level probability of a child dying for each year of the 11-year period using direct life-table estimation methods.

Community Health Workers dataset - To understand the extent to which the program implementation was achieving the expected results we used data from two main sources (aggregated in one dataset), the Health Information System and the population census. The HIS system provided the number of CHW by province and new users of family planning. We abstracted from the census the number of women in reproductive age.

Univariate

Load libraries and data

Create data frame

chw_pf1 <- data.frame(chw_fp[c('province', 'ape_contrib')])
tableFreq=as.data.frame(chw_pf1)
names(tableFreq)=c("province","ape_contrib")
tableFreqO=tableFreq[order(tableFreq$ape_contrib),]
tableFreqO$gap=tableFreqO$ape_contrib-10
tableFreqO$Target=ifelse(tableFreqO$gap>0,"Above Target","Below Target")

Create Loliplot

##Misc plot elements
loli_title = "Mozambique Community Health Workers contribuition on Family Planning"
loli_subtitle = "2017 province level gap on CHW contribuition"
loli_caption = "Fig.1: Represents the contirbution of each province to achieve the 10% target (centered at 0)
in 2017 (Gap analysis). Provinces are ploted from low to high perfomance.
Source:Health Information System"

base = ggplot(tableFreqO, aes(province,gap,color=Target,
                              label = round(gap,1))) 
lolliplot1=base + geom_segment(aes(y =0, 
                                   x = province, 
                                   yend = gap, 
                                   xend = province)) 
lolliplot2=lolliplot1 + geom_point()
lolliplot3= lolliplot2 + scale_x_discrete(limits=tableFreqO$province) 
lollitplot4 = lolliplot3 + geom_text(size=3, nudge_x=0.35, nudge_y=0.1,show.legend = FALSE) +
              labs(title = loli_title,
                   subtitle = loli_subtitle,
                    x ="Province", 
                    y = "% points of the FP GAP",
                    caption = loli_caption) +
            theme(panel.background = element_rect(fill = "gray98",
                                                    colour = "black"),
                    plot.caption = element_text(hjust = 0), 
                    plot.title = element_text(hjust = 0.5),
                    plot.subtitle = element_text(hjust=0.5),
                    legend.box.just = c("right","center"), 
                    axis.text.x = element_text(size=7, angle = 60, vjust = 1, hjust=1)) +
              geom_hline(yintercept=0,
                    linetype="dashed",
                    color = "black",
                    size= 0.9,
                    alpha= 0.8)
lollitplot4 

Load libraries and data

link="https://github.com/ihsun-uw/Group3_Final_Project/raw/master/child_mortality.dta"

df <- read_dta(url(link))

Create data frame

df_2010 <- subset(df, year==2010)
df_2010=df_2010[order(df_2010$U5M_COMB),]
df_2010$gap <- df_2010$U5M_COMB - 75
df_2010$MDG_Target=ifelse(df_2010$gap>0,"-Not Achieved","Achieved")

Create Lolliplot

base = ggplot(df_2010, 
              aes(x=reorder(provname, gap), 
                  y=gap, 
                  color=MDG_Target,
                              label = round(gap,1))) 

lolliplot1=base + geom_segment(aes(y =0, 
                                   x = provname, 
                                   yend = gap, 
                                   xend = provname)) 

lolliplot2=lolliplot1 + geom_point()

lolliplot3= lolliplot2 + scale_x_discrete(limits=df_2010$provname) 

lollitplot4 = lolliplot3 + geom_text(nudge_x=0.35, nudge_y=0.1,show.legend = F, size=3) +
  labs(title = "Mozambique 2010 Gap analysis to achieve MDG 4 Goal",
       subtitle = "Province level gap to achieve under five mortality reduction goal",
       x ="Province", 
       y = "U5 mortality rate gap (per 1000 LB)",
       caption = "Fig.1: Represents province level gap to achieve 2/3 reduction on under five mortality rate (75 per 1000 live births target centered at 0).
      
       
       Source:Demographic Health Surveys") +
  theme(panel.background = element_rect(fill = "grey96",
                                        colour = "grey50"),
        plot.caption = element_text(hjust = 0, size = 8), 
        plot.title = element_text(hjust = 0.5, size=12, face="bold"),
        plot.subtitle = element_text(hjust=0.5, face="bold"),
        legend.box.just = c("right","center"), 
        axis.text.y = element_text(size=7),
        axis.text.x = element_text(size=7, angle = 45, vjust = 1, hjust=1),
        axis.title.x = element_text(size=10),
        axis.title.y = element_text(size=8)) +
  geom_hline(yintercept=0,
             linetype="dashed",
             color = "black",
             size= 0.9,
             alpha= 0.8)

lollitplot4 

Bivariate

child_mort<-read_dta(url(link))

g <- ggplot(df, aes(x=reorder(provname,-U5M_COMB), y=U5M_COMB))
g + geom_boxplot(varwidth=T, fill="grey80") + 
    labs(title="Under Five mortality Distribuition by Region in Mozambique", 
         caption="Source: DHS",
         x="Province",
         y="Under Five Mortality (per thousand)") +
   theme(panel.background = element_rect(fill = "gray98",
                                                    colour = "black"),
                    plot.caption = element_text(hjust = 0), 
                    plot.title = element_text(hjust = 0.5),
                    plot.subtitle = element_text(hjust=0.5),
                    legend.box.just = c("right","center"), 
                    axis.text.x = element_text(size=7, angle = 45, vjust = 1, hjust=1)) 

Multivariate

df1=df

df_aggre1 <- aggregate(cbind(birthatend, hwdensity, midwifedensity, mddensity)~provname, data=df1, FUN=mean )

df_aggre1 <- df_aggre1[-c(5), ]

# get minimun value by row
df_aggre1$min=apply(df_aggre1[,c(2:5)],1,min)

# turn this min values into a ranking
df_aggre1$min=rank(df_aggre1$min,ties.method ='first' )

# order city by ranking
prov_fact=as.factor(df_aggre1[order(df_aggre1$min),]$provname)

# turn city into ordered factor
df_aggre1$provname=factor(df_aggre1$provname,
                   levels= prov_fact,
                   labels = prov_fact,
                   ordered = T)

# delete column with ranks
df_aggre1$min=NULL

colnames(df_aggre1) <- c("provname", "Health Professional Present at Birth", "Health Worker Density", "Midwife Density", "Medical Doctor Density")

Create radar plot

base = ggRadar(df_aggre1,aes(group='provname'),legend.position="none") 

radar1 = base + facet_wrap(~provname,nrow =2) +
    labs(title = "Radar plot: Province main study variables",
                    caption = "Fig.1: Describes how provinces are performing on study variables : Health professional present at birth, health worker density, midwife density, and medical doctor density. 

Source:Health Information System") +
              theme(panel.background = element_rect(fill = "gray90"),
                    plot.caption = element_text(hjust = 0, size = 10), 
                    plot.title = element_text(hjust = 0.5, size=14)) 

radar1 

Create plot of factors that can affect mortality rates

scat44 <- ggplot(df, aes()) + 
  geom_point(aes(x=year, y=hwdensity, color="Health Worker Density"), size = 0.8, alpha=1/3) + 
  geom_smooth(aes(x=year, y=hwdensity), method="loess", se=T, size=1, color="purple") +
  geom_point(aes(x=year, y=midwifedensity, color= "Midwife Density"), size = 0.8, alpha=1/4) + 
  geom_smooth(aes(x=year, y=midwifedensity), method="loess", se=T, size=1, color="green") +
  geom_point(aes(x=year, y=mddensity, color= "Medical Doctor Density"), size = 0.8, alpha=1/5) +
  geom_smooth(aes(x=year, y=mddensity), method="loess", se=T, size=1, color="red") +
  labs(title = "Factors that may Influence the Mortality Rate in Mozambique",
                    y =" Variable Density (per 1000 live births)", 
                    x = "Years")+
  theme(legend.position = "bottom",
  plot.caption = element_text(hjust = 0), 
                    plot.title = element_text(hjust = 0.5)) +
  scale_x_continuous(breaks = c(2000,2002,2004,2006,2008,2010))  + theme(legend.title=element_blank())

scat44  

Run regressions

Create visualization of regression confidence intervals

# combining
allModels=rbind(model1_t, model2_t, model3_t)

#plotting
dwplot(allModels) + facet_wrap(~model)+ 
  geom_vline(xintercept = 0, 
               colour = "black", 
               linetype = 4) + 
      labs(title = "Effect of Health Human resources and Service 
           Utilization on Child Mortality",
                    caption = "Fig.1: Show regression coeficientes and 95% CI for birth atendance,
midewife desnsity and total health human resources on child mortality.

Source:Health Information System & DHS") +
              theme(panel.background = element_rect(fill = "gray97",
                                                    colour = "black"),
                    plot.caption = element_text(hjust = 0, size = 8), 
                    plot.title = element_text(hjust = 0.5, size=12)) +
  theme(axis.text.y = element_text(angle = 45, vjust = 0.6))

Maps

Bringing in mortality and map data

#link="https://github.com/quinhasf/pubpol-599/raw/master/ape_analysis.dta"
#chw_fp <- read_dta(url(link))

link="https://github.com/ihsun-uw/Group3_Final_Project/raw/master/child_mortality.dta"
df <- read_dta(url(link))
zip_mozmap_SHP = "https://github.com/ihsun-uw/Group3_Final_Project/raw/master/Mozambique%20shape%20maps.zip"
library(utils)
temp=tempfile()
download.file(zip_mozmap_SHP, temp)
unzip(temp)
# notice the parameters use in the chunk!!

library(rgdal)
mozzipMap <- readOGR("MOZ-level_1.shp",stringsAsFactors=F) 
layerContrib_2=merge(mozzipMap,df_new, by.x='province_num', by.y='province_num',all.x=F)
varToPlot_2=layerContrib_2$U5M_COMB
numberOfClasses=5
colorForScale='OrRd'
colors = brewer.pal(numberOfClasses, colorForScale)
intervals <- classIntervals(varToPlot_2, numberOfClasses,
                            style = "kmeans",
                            dataPrecision = 0)
                           
colorPallette <- findColours(intervals, colors)

Generating one more data set for mapping - change in u5 MR

layerContrib_3=merge(mozzipMap,df_new, by.x='province_num', by.y='province_num',all.x=F)
varToPlot_3=layerContrib_2$u5_change
numberOfClasses=5
colorForScale='YlOrRd'
colors = brewer.pal(numberOfClasses, colorForScale)
intervals <- classIntervals(varToPlot_3, numberOfClasses,
                            style = "kmeans",
                            dataPrecision = 0)
                           
colorPallette <- findColours(intervals, colors)

Map

legendText="Change in u5 mortality"
shrinkLegend=1
title="Percent change in u5 mortality rate in Mozambique by province \n(2000-2010)"

# first the ORIGINAL to signal missing values:
plot(mozzipMap,col='red',main=title, border="black", lwd=1) 

# now the info on contributions
plot(layerContrib_3, col = colorPallette,border=NA,add=T) #add

# this uses all previous information
legend('topright', 
       legend = names(attr(colorPallette, "table")), #values
       fill = attr(colorPallette, "palette"), #colors
       cex = shrinkLegend, #size 
       bty = "n", # no box
       title=legendText)